In class exericse 08

Learning network analysis. Networks are everywhere, it can refer to road maps, urban planning of bus nodes, inter and intra zonal transports, shipping routes, air flight routes, sewerage and water lines, social network

Frostbear https://sg.linkedin.com/in/farahfoo (SMU Masters in IT business (Fintech and Analytics))https://scis.smu.edu.sg/master-it-business
2022-03-12

Network Metrics

The network is built using the statistics, to determine the number of nodes, complexity, visual clustering, closeness of the nodes. Between-ness is referring to the importance of the node / concentration risk.

Close-ness means which node is the central that requires the least number of paths to travel from 1 end to the other.

Examples of network

social network by Prof Kam

Singapore flight by Prof Kam The centrality index is based on the number of flights from one airport to another airport.

Project management network by Prof Kam

Organising the layouts of the network

Customisation of parts of the visualisation to represent

Node size, Label, Shapes, Images, Border colour, Weight of line, line colour, line type all can be customised to value-add to the network visualisation.

Network visualisation basic attributes

In-class exercise for Network using ggraph and tidygraph

Loading required packages

tidygraph helps to build up the stats data for the network graph. visNetwork is Java scripted based, and provides interactivity for the visualisation. igraph is an old package and should be replaced with tidygraph. Lubriate handles date and Clock handles time.

packages = c('tidygraph', 
             'ggraph', 'visNetwork', 
             'lubridate', 'clock',
             'tidyverse')
for (p in packages) {
  if(!require(p,character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

Importing data files and preparing

GAStech_nodes <- read_csv("data/GAStech_email_node.csv")
GAStech_edges <- read_csv("data/GAStech_email_edge-v2.csv")

glimpse(GAStech_edges)
Rows: 9,063
Columns: 8
$ source      <dbl> 43, 43, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, ~
$ target      <dbl> 41, 40, 51, 52, 53, 45, 44, 46, 48, 49, 47, 54, ~
$ SentDate    <chr> "6/1/2014", "6/1/2014", "6/1/2014", "6/1/2014", ~
$ SentTime    <time> 08:39:00, 08:39:00, 08:58:00, 08:58:00, 08:58:0~
$ Subject     <chr> "GT-SeismicProcessorPro Bug Report", "GT-Seismic~
$ MainSubject <chr> "Work related", "Work related", "Work related", ~
$ sourceLabel <chr> "Sven.Flecha", "Sven.Flecha", "Kanon.Herrero", "~
$ targetLabel <chr> "Isak.Baza", "Lucas.Alcazar", "Felix.Resumir", "~
GAStech_edges$SentDate  = dmy(GAStech_edges$SentDate)
GAStech_edges$Weekday = wday(GAStech_edges$SentDate, 
                             label = TRUE, 
                             abbr = FALSE)

# see the SentDate field has been changed to date and time format
head(GAStech_edges, 3)
# A tibble: 3 x 9
  source target SentDate   SentTime Subject    MainSubject sourceLabel
   <dbl>  <dbl> <date>     <time>   <chr>      <chr>       <chr>      
1     43     41 2014-01-06 08:39    GT-Seismi~ Work relat~ Sven.Flecha
2     43     40 2014-01-06 08:39    GT-Seismi~ Work relat~ Sven.Flecha
3     44     51 2014-01-06 08:58    Inspectio~ Work relat~ Kanon.Herr~
# ... with 2 more variables: targetLabel <chr>, Weekday <ord>

the Weekday has also been pulled out and factored levels in Ordinate scale

GAStech_edges_aggregated <- GAStech_edges %>%
  filter(MainSubject == "Work related") %>%
  group_by(source, target, Weekday) %>%
    summarise(Weight = n()) %>%
  filter(source!=target) %>% #"!" filters away emails that employees send to themselves
  filter(Weight > 1) %>% # filter out emails almost without content and are probably in error
  ungroup()

glimpse (GAStech_edges_aggregated)
Rows: 1,456
Columns: 4
$ source  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
$ target  <dbl> 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6~
$ Weekday <ord> Monday, Tuesday, Wednesday, Friday, Monday, Tuesday,~
$ Weight  <int> 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3, 5, 8, 4, 3~

Graphing the dataset prepared

GAStech_graph <- tbl_graph(nodes = GAStech_nodes,
                           edges = GAStech_edges_aggregated, 
                           directed = TRUE) # set directed to true if you want to display the direction

GAStech_graph
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Node Data: 54 x 4 (active)
     id label               Department     Title                      
  <dbl> <chr>               <chr>          <chr>                      
1     1 Mat.Bramar          Administration Assistant to CEO           
2     2 Anda.Ribera         Administration Assistant to CFO           
3     3 Rachel.Pantanal     Administration Assistant to CIO           
4     4 Linda.Lagos         Administration Assistant to COO           
5     5 Ruscella.Mies.Haber Administration Assistant to Engineering G~
6     6 Carla.Forluniau     Administration Assistant to IT Group Mana~
# ... with 48 more rows
#
# Edge Data: 1,456 x 4
   from    to Weekday   Weight
  <int> <int> <ord>      <int>
1     1     2 Monday         4
2     1     2 Tuesday        3
3     1     2 Wednesday      5
# ... with 1,453 more rows
GAStech_graph %>%
  activate(edges) %>%
  arrange(desc(Weight))
# A tbl_graph: 54 nodes and 1456 edges
#
# A directed multigraph with 1 component
#
# Edge Data: 1,456 x 4 (active)
   from    to Weekday Weight
  <int> <int> <ord>    <int>
1    40    41 Tuesday     23
2    40    43 Tuesday     19
3    41    43 Tuesday     15
4    41    40 Tuesday     14
5    42    41 Tuesday     13
6    42    40 Tuesday     12
# ... with 1,450 more rows
#
# Node Data: 54 x 4
     id label           Department     Title           
  <dbl> <chr>           <chr>          <chr>           
1     1 Mat.Bramar      Administration Assistant to CEO
2     2 Anda.Ribera     Administration Assistant to CFO
3     3 Rachel.Pantanal Administration Assistant to CIO
# ... with 51 more rows
ggraph(GAStech_graph) +
  geom_edge_link() +
  geom_node_point()

g <- ggraph(GAStech_graph) + 
  geom_edge_link(aes(colour = 'grey50')) +
  geom_node_point(aes(colour = 'grey40'))
g + theme_graph(background = 'grey10',
                text_colour = 'white')

Playing with graph layouts.

g <- ggraph(GAStech_graph, 
            layout = "fr") +
  geom_edge_link(aes()) +
  geom_node_point(aes())
g + theme_graph()

Colouring the nodes by department

g <- ggraph(GAStech_graph, 
            layout = "nicely") + 
  geom_edge_link(aes()) +
  geom_node_point(aes(colour = Department, 
                      size = 3))
g + theme_graph()

Using the thickness of lines to show the weight variable

g <- ggraph(GAStech_graph, 
            layout = "nicely") +
  geom_edge_link(aes(width=Weight), 
                 alpha=0.2) +
  scale_edge_width(range = c(0.1, 5)) +
  geom_node_point(aes(colour = Department), 
                  size = 3) +
  theme(legend.position = 'bottom')

g + theme_graph()

Faceting edge (weekday). you can only Facet by the variable available, so edge for weekday, nodes for department in the next chart

g <- ggraph(GAStech_graph, 
            layout = "nicely") +
  geom_edge_link(aes(width=Weight), 
                 alpha=0.2) +
  scale_edge_width(range = c(0.1, 5)) + #this defines the min and max thickness of the width of the line
  geom_node_point(aes(colour = Department), 
                  size = 3)

g + facet_edges(~Weekday)+
  th_foreground(foreground = "grey80",  
                border = TRUE) +
  theme(legend.position = 'bottom')

Faceting nodes (department). you can only Facet by the variable available, so edge for weekday, nodes for department in the next chart

g <- ggraph(GAStech_graph, 
            layout = "nicely") +
  geom_edge_link(aes(width=Weight), 
                 alpha=0.2) +
  scale_edge_width(range = c(0.1, 5)) + #this defines the min and max thickness of the width of the line
  geom_node_point(aes(colour = Department), 
                  size = 3)

g + facet_nodes(~Department)+
  th_foreground(foreground = "grey80",  
                border = TRUE) +
  theme(legend.position = 'bottom')

Faceting nodes (department). you can only Facet by the variable available, so edge for weekday, nodes for department in the next chart

g <- GAStech_graph %>%
  mutate(betweenness_centrality = centrality_betweenness()) %>%
  ggraph(layout = "fr") + 
  geom_edge_link(aes(width=Weight), 
                 alpha=0.2) +
  scale_edge_width(range = c(0.1, 5)) +
  geom_node_point(aes(colour = Department,
            size=betweenness_centrality))
g + theme_graph()

Visualising Community using the same colour / node point

g <- GAStech_graph %>%
  mutate(community = as.factor(group_edge_betweenness(weights = Weight, directed = TRUE))) %>%
  ggraph(layout = "fr") + 
  geom_edge_link(aes(width=Weight), 
                 alpha=0.2) +
  scale_edge_width(range = c(0.1, 5)) +
  geom_node_point(aes(colour = community))  
g + theme_graph()

Building Interactive Network Graph with visNetwork

*creating a new aggregate table that shows the from and to field. Viznetwork has a rigid network structure, only recognises the column name “from” and “to”, so the data columns need to be renamed as well.

GAStech_edges_aggregated <- GAStech_edges %>%
  left_join(GAStech_nodes, by = c("sourceLabel" = "label")) %>%
  rename(from = id) %>%
  left_join(GAStech_nodes, by = c("targetLabel" = "label")) %>%
  rename(to = id) %>%
  filter(MainSubject == "Work related") %>%
  group_by(from, to) %>%
    summarise(weight = n()) %>%
  filter(from!=to) %>%
  filter(weight > 1) %>%
  ungroup()

glimpse (GAStech_edges_aggregated)
Rows: 839
Columns: 3
$ from   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
$ to     <dbl> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1~
$ weight <int> 21, 21, 21, 21, 21, 21, 15, 15, 15, 15, 15, 15, 15, 1~
visNetwork(GAStech_nodes, 
           GAStech_edges_aggregated)
visNetwork(GAStech_nodes,
           GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr")
GAStech_nodes <- GAStech_nodes %>%
  rename(group = Department)

visNetwork(GAStech_nodes,
           GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visLegend() %>%
  visLayout(randomSeed = 123)
visNetwork(GAStech_nodes,
           GAStech_edges_aggregated) %>%
  visIgraphLayout(layout = "layout_with_fr") %>%
  visEdges(arrows = "to", 
           smooth = list(enabled = TRUE, 
                         type = "curvedCW")) %>%
  visLegend() %>%
  visLayout(randomSeed = 123)

Visualising Flows Between Entities: Chord Diagram method

packages = c('circlize', 
             'chorddiag')
for (p in packages) {
  if(!require(p,character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

mig_data <- read_csv("data/bilateral_migration2017.csv")

run devtools::install_github(“mattflor/chorddiag/”) to install this as it is not readily available in R cran

continue from notess…………